clear all 
set more off 
set maxvar 15000 
clear matrix

*--------------------------------------------------*
* PREPARE CENSUS DATA 
*--------------------------------------------------*

* Append Censuses; restrict to US-born individuals aged 30-50 in a given Census 
    use "$CensusData/input/Census1960_1pct_all30to50.dta", clear 

    foreach i in 1980 2000 {
        append using "$CensusData/input/Census`i'_1pct_all30to50.dta"
    }

    * Restrictions
    keep if bpl<100 //US-born individuals (not including territories) 
    keep if (inrange(birthyr,1910,1929) & year==1960) | (inrange(birthyr,1930,1949) & year==1980) | (inrange(birthyr,1950,1969) & year==2000) 
    
* Race
    gen black =.
    replace black =1 if race==2
    replace black =0 if race ==1 //Note: includes Caucasians and some respondents who identified themselves as being Spanish in heritage
    tab black, m
    
    gen white = (black==0) if black<.
    tab white, m
    
    tab sex, gen(sex_)
    ren sex_1 male
    ren sex_2 female
    tab male, m
    tab female, m

    //Race x sex dummies 
    gen white_male = (black==0 & sex==1) if (black~=. & sex~=.)
    tab white_male, m

    gen white_female = (black==0 & sex==2) if (black~=. & sex~=.)
    tab white_female, m

    gen nonw_male = (black==1 & sex==1) if (black~=. & sex~=.)
    tab nonw_male, m

    gen nonw_female = (black==1 & sex==2) if (black~=. & sex~=.)
    tab nonw_female, m
    
 *R has completed hs
    gen hs_ed = (educ>=6) //no missings in educ
    tab hs_ed, m

 * College education
    gen coll_ed = (educ>=10) //no missings in educ
    tab coll_ed, m 
    
 * Currently resides in the South
    gen south_merge_son = (inrange(region,31,34)) if region<90 //>90 = unknown
    tab south_merge_son, m 
    
 * Born in the South 
 
        /* South: Delaware, District of Columbia, Florida, 
                  Georgia, Maryland, North Carolina, 
                  South Carolina, Virginia, West Virginia,
                  Alabama, Kentucky, Mississippi, 
                  Tennessee, Arkansas, Louisiana, 
                  Oklahoma, Texas */
                  
    gen south_merge = (bpl==10 | bpl==11 | bpl==12 | ///
                       bpl==13 | bpl==24 | bpl==37 | ///
                       bpl==45 | bpl==51 | bpl==54 | ///
                       bpl==1  | bpl==21 | bpl==28 | ///
                       bpl==47 | bpl==5  | bpl==22 | ///
                       bpl==40 | bpl==48) if bpl<99 //99 is "US, na"
    
    tab south_merge, m

* Married---note: no missings 
    gen married = (marst==1 | marst==2)
    tab married, m 

* Widowed 
    gen widowed = (marst==5)
    tab widowed, m 
    
* Put total family income in 1950$
    replace ftotinc =. if ftotinc>=9999998 //not ascertained or N/A

* Winsorize income (5% in tails)
    winsor ftotinc if ftotinc<., p(0.05) gen(fam_inc_real)

    gen CPI1950 = 24.1 
    gen CPI1960 = 29.6
    gen CPI1980 = 82.4
    gen CPI2000 = 172.2

    foreach y in 1960 1980 2000  {
        replace fam_inc_real = fam_inc_real*(CPI1950/CPI`y') if year==`y' & fam_inc_real!=.
    }
    label var fam_inc_real "Family income, in 1950 dollars" 

/* Fix weight so that it has mean 1 in each year. 
   Rename as wgt_sex_race for ease when creating table. */
    gen wgt_sex_race=.
    foreach i in 1960 1980 2000 {

        sum perwt if year==`i'
        local weight_avg = r(mean)
        
        replace wgt_sex_race = perwt/`weight_avg' if year==`i'
    }

* Ranked version of family income (with weight)
    /* Note: There's always >100 obs in each birth year---
             no need to condition on >100 obs in birth year */

    foreach var of varlist fam_inc_real  { 
    
    if "`var'" == "fam_inc_real" local rname0 "rank_R"
    if "`var'" == "fam_inc_real" local ysel0 "ysel_R"
    
        egen `rname0' = xtile(`var') if inrange(birthyr,1910,1979) & fam_inc_real!=., by(birthyr) nq(100) weight(wgt_sex_race) 
        
        qui:gen `ysel0'=.
        replace `ysel0'= birthyr if `rname0'!=.

    }

    gen rank_father=. //for table
    assert rank_R==. if fam_inc_real==. 
    label var rank_R "Rank son, Census family income, baseline"

    tempfile censuses
    save `censuses'
        
*---------------------------------------------------------------*
*---------------------------------------------------------------*

*--------------------------------------------------------------*
/* MAKE TABLE THAT COMPARES CHARACTERISTICS FOR CENSUS AND 
   SURVEY RESPONDENTS (BY GROUPED BIRTH DECADES) */
*--------------------------------------------------------------*
     
* 1. Set up table  
    file open Census_surveys_comp using "$Mydirectory2/appendix_a/Census_vs_surveys_stata2tex_uw.tex", write replace 
         
    file write Census_surveys_comp "\begin{tabularx}{\hsize}{@{\hskip\tabcolsep\extracolsep\fill} l c c c c c c}" _n 
    file write Census_surveys_comp  "\toprule" _n 
    file write Census_surveys_comp "\addlinespace[0.5ex]" _n 
    file write Census_surveys_comp "& \multicolumn{2}{c}{1910--1929} & \multicolumn{2}{c}{1930--1949} & \multicolumn{2}{c}{1950--1969} \\" _n  
    file write Census_surveys_comp "\addlinespace[0.05ex]" _n   
    file write Census_surveys_comp "\cmidrule(lr){2-3} \cmidrule(lr){4-5} \cmidrule(lr){6-7}" _n 
    file write Census_surveys_comp "\addlinespace[0.05ex]" _n 
    file write Census_surveys_comp "& Census & Survey & Census & Survey & Census & Survey \\" _n 
    file write Census_surveys_comp "\addlinespace[0.25ex]" _n 
    file write Census_surveys_comp  "\midrule" _n 
    
*-----------------*
*-----------------*

* 2. Panel A: white men

    file write Census_surveys_comp "\addlinespace[1.5ex]" _n 
    file write Census_surveys_comp "\textit{Panel A: White Men} & & & & & &  \\" _n 

    foreach var in white age hs_ed coll_ed south_merge south_merge_son married widowed fam_inc_real rank_R {
    
        if "`var'"=="white" local label1 "Share of Men"
        if "`var'"=="age" local label1 "Age"
        if "`var'"=="hs_ed" local label1 "High school graduate"
        if "`var'"=="coll_ed" local label1 "College graduate"
        if "`var'"=="south_merge" local label1 "Southern born/grew up"
        if "`var'"=="south_merge_son" local label1 "Resides in the South"
        if "`var'"=="married" local label1 "Married"
        if "`var'"=="widowed" local label1 "Widowed"
        if  "`var'"=="fam_inc_real" local label1 "Family income, 1950\textdollar"
        if "`var'"=="rank_R" local label1 "Respondent rank" 
    
        file write Census_surveys_comp "\quad `label1' "

        foreach y in 1960 1980 2000 {
    
            foreach x in census survey {
            
            * Weight
            if "`x'"=="census" local weights "[aw=wgt_sex_race]"
            if "`x'"=="survey" local weights ""

            * Bring in cleaned data 
                if "`x'"== "census" {
                    use `censuses', clear   
                
                    //keep the right Census year
                    keep if year==`y'
                }
            
                if "`x'"== "survey" {
					use "$Mydirectory1/3_Output/2_PooledData_analysis.dta", clear 
                    keep if baseline_sample==1

                    ren rank_son_noweight rank_R //ranked son baseline, no weight
                
                    * Dummy: white
                    gen white = (black==0) if black<.
                    tab white, m
                    
                    * Dummy: sex
                    tab sex, gen(sex_)
                    ren sex_1 male
                    ren sex_2 female
                    tab male, m
                    tab female, m
        
                    /*Keep the birth cohorts that mean the survey respondent 
                      will be 30-50 in the given Census year */
                    if `y'==1960 keep if inrange(dob,1910,1929)
                    if `y'==1980 keep if inrange(dob,1930,1949)
                    if `y'==2000 keep if inrange(dob,1950,1969)                         
                }
    
                    if "`var'"=="white"  {
                        * Share white + male
                            sum white if sex==1 `weights'
                                local share =`r(mean)'
                                local share2: display %-09.2fc `share'
                                            
                            file write Census_surveys_comp "& `share2'"             
                    }
            
                    else  {
                        *Drop if respondent is not a white male
                            keep if white_male==1
                            
                            sum white_male `weights'
                            local number_`y'_`x' = `r(N)'
                            local number2_`y'_`x': display %-09.0fc `number_`y'_`x''
        
                        * Find average 
                            sum `var' `weights'
                                local number =`r(mean)'
                                
                                if "`var'"!="fam_inc_real" local number2: display %-09.2fc `number'
                                else local number2: display %-09.0fc `number'
                    
                            file write Census_surveys_comp "& `number2' "
                    }
                    
            }                   
        }
        
            file write Census_surveys_comp " \\" _n 
            
    }

* 2.5. Number of observations: white men
    file write Census_surveys_comp "\addlinespace[1ex]" _n
    file write Census_surveys_comp "\midrule" _n    
    file write Census_surveys_comp "\addlinespace[0.5ex]" _n
    
    file write Census_surveys_comp "Observations"
    file write Census_surveys_comp "& `number2_1960_census' & `number2_1960_survey' & `number2_1980_census' & `number2_1980_survey' & `number2_2000_census' & `number2_2000_survey'" 
    file write Census_surveys_comp "\\ " _n
    file write Census_surveys_comp "\addlinespace[0.05ex]" _n
    file write Census_surveys_comp "\midrule" _n    
    
*-----------------*
*-----------------*

* 3. Panel B: black men

    file write Census_surveys_comp "\addlinespace[1.5ex]" _n 
    file write Census_surveys_comp "\textit{Panel B: Black Men} & & & & & &  \\" _n 
    
        foreach var in black age hs_ed coll_ed south_merge south_merge_son married widowed fam_inc_real rank_R {
    
        if "`var'"=="black" local label1 "Share of Men"
        if "`var'"=="age" local label1 "Age"
        if "`var'"=="hs_ed" local label1 "High school graduate"
        if "`var'"=="coll_ed" local label1 "College graduate"
        if "`var'"=="south_merge" local label1 "Southern born/grew up"
        if "`var'"=="south_merge_son" local label1 "Resides in the South"
        if "`var'"=="married" local label1 "Married"
        if "`var'"=="widowed" local label1 "Widowed"
        if  "`var'"=="fam_inc_real" local label1 "Family income, 1950\textdollar"
        if "`var'"=="rank_R" local label1 "Respondent rank" 

        file write Census_surveys_comp "\quad `label1' "

        foreach y in 1960 1980 2000 {
    
            foreach x in census survey {
    
            * Weight
            if "`x'"=="census" local weights "[aw=wgt_sex_race]"
            if "`x'"=="survey" local weights ""
  
            * Bring in cleaned data 
                if "`x'"== "census" {
                    use `censuses', clear   
                
                    //keep the right Census year
                    keep if year==`y'
                }
            
                if "`x'"== "survey" {
					use "$Mydirectory1/3_Output/2_PooledData_analysis.dta", clear 
                    keep if baseline_sample==1

                    ren rank_son_noweight rank_R //ranked son baseline, no weight

                    * Dummy: white
                    gen white = (black==0) if black<.
                    tab white, m
                    
                    * Dummy: sex
                    tab sex, gen(sex_)
                    ren sex_1 male
                    ren sex_2 female
                    tab male, m
                    tab female, m
        
                    /*Keep the birth cohorts that mean the survey respondent 
                      will be 30-50 in the given Census year */
                    if `y'==1960 keep if inrange(dob,1910,1929)
                    if `y'==1980 keep if inrange(dob,1930,1949)
                    if `y'==2000 keep if inrange(dob,1950,1969)                         
                }
    
                    if "`var'"=="black"  {
                        * Share black + male
                            sum black if sex==1 `weights'
                                local share =`r(mean)'
                                local share2: display %-09.2fc `share'
            
                            file write Census_surveys_comp "& `share2'"             
                    }
            
                    else    {
                        * Drop if respondent is not a black male
                            keep if nonw_male==1

                            sum nonw_male `weights'
                            local number_`y'_`x' = `r(N)'
                            local number2_`y'_`x': display %-09.0fc `number_`y'_`x''
       
                        * Find average 
                            sum `var' `weights'
                                local number =`r(mean)'
                                
                                if "`var'"!="fam_inc_real" local number2: display %-09.2fc `number'
                                else local number2: display %-09.0fc `number'
                    
                            file write Census_surveys_comp "& `number2' "
                    }
                    
            }                   
        }
        
            file write Census_surveys_comp " \\" _n 
            
    }

* 3.5. Number of observations: black men
    file write Census_surveys_comp "\addlinespace[1ex]" _n
    file write Census_surveys_comp "\midrule" _n    
    file write Census_surveys_comp "\addlinespace[0.5ex]" _n
    
    file write Census_surveys_comp "Observations"
                    
    file write Census_surveys_comp "& `number2_1960_census' & `number2_1960_survey' & `number2_1980_census' & `number2_1980_survey' & `number2_2000_census' & `number2_2000_survey'" 
    file write Census_surveys_comp " \\" _n 
    file write Census_surveys_comp "\addlinespace[0.05ex]" _n
    file write Census_surveys_comp "\midrule" _n    
    
*-----------------*
*-----------------*

* 4. Panel C: white women

    file write Census_surveys_comp "\addlinespace[1.5ex]" _n 
    file write Census_surveys_comp "\textit{Panel C: White Women} & & & & & &  \\" _n 

        foreach var in white age hs_ed coll_ed south_merge south_merge_son married widowed fam_inc_real rank_R {
    
        if "`var'"=="white" local label1 "Share of Women"
        if "`var'"=="age" local label1 "Age"
        if "`var'"=="hs_ed" local label1 "High school graduate"
        if "`var'"=="coll_ed" local label1 "College graduate"
        if "`var'"=="south_merge" local label1 "Southern born/grew up"
        if "`var'"=="south_merge_son" local label1 "Resides in the South"
        if "`var'"=="married" local label1 "Married"
        if "`var'"=="widowed" local label1 "Widowed"
        if  "`var'"=="fam_inc_real" local label1 "Family income, 1950\textdollar"
        if "`var'"=="rank_R" local label1 "Respondent rank" 
    
        file write Census_surveys_comp "\quad `label1' "

        foreach y in 1960 1980 2000 {
    
            foreach x in census survey {

            * Weight
            if "`x'"=="census" local weights "[aw=wgt_sex_race]"
            if "`x'"=="survey" local weights ""

            * Bring in cleaned data 
                if "`x'"== "census" {
                    use `censuses', clear   
                
                    //keep the right Census year
                    keep if year==`y'
                }
            
                if "`x'"== "survey" {
					use "$Mydirectory1/3_Output/2_PooledData_analysis.dta", clear 
                    keep if baseline_sample==1

                    ren rank_son_noweight rank_R //ranked son baseline, no weight

                    * Dummy: white
                    gen white = (black==0) if black<.
                    tab white, m
                    
                    * Dummy: sex
                    tab sex, gen(sex_)
                    ren sex_1 male
                    ren sex_2 female
                    tab male, m
                    tab female, m
        
                    /*Keep the birth cohorts that mean the survey respondent 
                      will be 30-50 in the given Census year */
                    if `y'==1960 keep if inrange(dob,1910,1929)
                    if `y'==1980 keep if inrange(dob,1930,1949)
                    if `y'==2000 keep if inrange(dob,1950,1969)                         
                }
    
                    if "`var'"=="white"  {
                        *Share white + female
                            sum white if sex==2 `weights'
                                local share =`r(mean)'
                                local share2: display %-09.2fc `share'
            
                            file write Census_surveys_comp "& `share2'"             
                    }
            
                    else    {
                        * Drop if respondent is not a white female
                            keep if white_female==1
 
                            sum white_female `weights'
                            local number_`y'_`x' = `r(N)'
                            local number2_`y'_`x': display %-09.0fc `number_`y'_`x''

                        * Find average 
                            sum `var' `weights'
                                local number =`r(mean)'
                                
                                if "`var'"!="fam_inc_real" local number2: display %-09.2fc `number'
                                else local number2: display %-09.0fc `number'
                    
                            file write Census_surveys_comp "& `number2' "
                    }
                    
            }                   
        }
        
            file write Census_surveys_comp " \\" _n 
            
    }


* 4.5. Number of observations: white women
    file write Census_surveys_comp "\addlinespace[1ex]" _n
    file write Census_surveys_comp "\midrule" _n    
    file write Census_surveys_comp "\addlinespace[0.5ex]" _n
    
    file write Census_surveys_comp "Observations"

    file write Census_surveys_comp "& `number2_1960_census' & `number2_1960_survey' & `number2_1980_census' & `number2_1980_survey' & `number2_2000_census' & `number2_2000_survey'" 
    file write Census_surveys_comp "\\ " _n
    file write Census_surveys_comp "\addlinespace[0.05ex]" _n
    file write Census_surveys_comp "\midrule" _n    

*-----------------*
*-----------------*

* 5. Panel D: black women

    file write Census_surveys_comp "\addlinespace[1.5ex]" _n 
    file write Census_surveys_comp "\textit{Panel D: Black Women} & & & & & &  \\" _n 

        foreach var in black age hs_ed coll_ed south_merge south_merge_son married widowed fam_inc_real rank_R {
    
        if "`var'"=="black" local label1 "Share of Women"
        if "`var'"=="age" local label1 "Age"
        if "`var'"=="hs_ed" local label1 "High school graduate"
        if "`var'"=="coll_ed" local label1 "College graduate"
        if "`var'"=="south_merge" local label1 "Southern born/grew up"
        if "`var'"=="south_merge_son" local label1 "Resides in the South"
        if "`var'"=="married" local label1 "Married"
        if "`var'"=="widowed" local label1 "Widowed"
        if  "`var'"=="fam_inc_real" local label1 "Family income, 1950\textdollar"
        if "`var'"=="rank_R" local label1 "Respondent rank" 
    
        file write Census_surveys_comp "\quad `label1' "

        foreach y in 1960 1980 2000 {
    
            foreach x in census survey {

            * Weight
            if "`x'"=="census" local weights "[aw=wgt_sex_race]"
            if "`x'"=="survey" local weights ""

            * Bring in cleaned data 
                if "`x'"== "census" {
                    use `censuses', clear   
                
                    //keep the right Census year
                    keep if year==`y'
                }
            
                if "`x'"== "survey" {
					use "$Mydirectory1/3_Output/2_PooledData_analysis.dta", clear 
                    keep if baseline_sample==1

                    ren rank_son_noweight rank_R //ranked son baseline, no weight

                    * Dummy: white
                    gen white = (black==0) if black<.
                    tab white, m
                    
                    * Dummy: sex
                    tab sex, gen(sex_)
                    ren sex_1 male
                    ren sex_2 female
                    tab male, m
                    tab female, m
        
                    /*Keep the birth cohorts that mean the survey respondent 
                      will be 30-50 in the given Census year */
                    if `y'==1960 keep if inrange(dob,1910,1929)
                    if `y'==1980 keep if inrange(dob,1930,1949)
                    if `y'==2000 keep if inrange(dob,1950,1969)                         
                }
    
                    if "`var'"=="black"  {
                        * Share black + female
                            sum black if sex==2 `weights'
                                local share =`r(mean)'
                                local share2: display %-09.2fc `share'
            
                            file write Census_surveys_comp "& `share2'"             
                    }
            
                    else    {
                        * Drop if respondent is not black female
                            keep if nonw_female==1

                            sum nonw_female `weights'
                            local number_`y'_`x' = `r(N)'
                            local number2_`y'_`x': display %-09.0fc `number_`y'_`x''
        
                        * Find average 
                            sum `var' `weights'
                                local number =`r(mean)'
                                
                                if "`var'"!="fam_inc_real" local number2: display %-09.2fc `number'
                                else local number2: display %-09.0fc `number'
                    
                            file write Census_surveys_comp "& `number2' "
                    }
                    
            }                   
        }
        
            file write Census_surveys_comp " \\" _n 
            
    }

* 5.5. Number of observations: black women
    file write Census_surveys_comp "\addlinespace[1ex]" _n
    file write Census_surveys_comp "\midrule" _n    
    file write Census_surveys_comp "\addlinespace[0.5ex]" _n
    
    file write Census_surveys_comp "Observations"
    
    file write Census_surveys_comp "& `number2_1960_census' & `number2_1960_survey' & `number2_1980_census' & `number2_1980_survey' & `number2_2000_census' & `number2_2000_survey'" 
    file write Census_surveys_comp "\\ " _n    
   
*-----------------*
*-----------------*

* 7. Finish table
    file write Census_surveys_comp "\addlinespace[0.05ex]" _n
    file write Census_surveys_comp "\bottomrule" _n
    file write Census_surveys_comp "\end{tabularx}" _n
    file write Census_surveys_comp "\thispagestyle{empty}"
    file close Census_surveys_comp
